# Dummy data num_samples = 1000 random_lines = [np.random.randint(len(lines)) for _ in range(num_samples)] data = [lines[i].decode("utf-8") for i in random_lines] processed_data = list(_item.replace('\n', '').split('\t') for _item in data) data_df = pd.DataFrame(processed_data, columns= column_names)